import re
import requests
import random


class Spider:

    def get_html(self, url, encoding="utf-8"):
        '''
        url:目录页面
            https://www.x23us.com/html/65/65650/
        '''
        headers = {
            'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36'
        }
        # ip_list = {'https': '106.15.42.179:33543'}


        r = requests.get(url, headers=headers)
        print(r.status_code)
        html = r.text

        return html


    def get_info(self, url, **regexs):
        '''
        url: 一个链接
        regex:信息名称 = 正则
        return :
            {信息名称：[信息]，.....}
        '''
        html = self.get_html(url)
        info_dict = {}
        for key,values in regexs.items():
            info = re.findall(values, html)
            info_dict[key] = info
        return info_dict
